*************************************************************************************************
/*	
 	Purpose: 																
	-Clean nat'l survey families + hhs (nfsh), 1987-1988					
	-Attach 1950 ANES occ codes to nsfh occupations							
																								
	Creates: fam_hh_cleaned.dta  											
*/																								
*************************************************************************************************

set more off 
clear 
cd "$Mydirectory1/1_DataSources/natl_survey_fam_HH/"

use "./input/06041-0001-Data" //download from ICPSR website. Search ICPSR 6401. Name is the same.
quietly do "./input/06041-0001-Supplemental_syntax" /*Run ICPSR-provided clean-up*/ //download from ICPSR website

******************
*** Weight
******************

/*Note: AWEIGHT is the individual case weight and is recommended by the nsfh 
        for analysis at the individual level. Per the nsfh codebook instructions,
        AWEIGHT should be divided by 10,000 to get the right number of decimals.*/
replace AWEIGHT = AWEIGHT/10000

/*------------------------------------------------------------------------------------------

											CLEANING

------------------------------------------------------------------------------------------*/

ren S208 r_income 

***********************************************
* Demographics *
***********************************************

*****************
* Gender *
*****************

ren M2DP01 sex 
tab sex, m

*****************
* Age *
*****************

ren M2BP01 age 
replace age=. if age==95 /*Note: age==95 is "95 or above"*/
gen agesq = age * age

keep if inrange(age,30,50)

******************
* Marital Status *
******************

gen married = (M2CP01==1) if M2CP01<.
tab married, m

gen never_married = (M2CP01==5) if M2CP01<.
tab never_married, m

gen widowed = (M2CP01==4) if M2CP01<.
tab widowed, m

gen divorced = (M2CP01==3) if M2CP01<.
tab divorced, m

gen separated = (M2CP01==2) if M2CP01<.
tab separated, m 

******************
* Race *
******************

gen race =1 if (M484>1 & M484<=6) /*white, some categories of latinx, other hispanic*/ 
replace race=2 if M484==1 /*black*/
replace race =. if M484>6 
tab race, m
label variable race "Respondent's Race (re-coded as white, black, missing)"

gen black = (race==2) if race<.
tab black, m
label variable black "Dummy =1 if Respondent is Black"

******************
* Foreign Born *
******************

*respondent
gen foreignborn = (M497A>51 & M497A~=990)
replace foreignborn=. if M497A==.
tab foreignborn, m

*Restrict to native born
keep if (foreignborn==0 | foreignborn==.)

*father foreign--not available

*-------------------------------------------------------------------------------------------------*
*-------------------------------------------------------------------------------------------------*

*************************************************************************************
* State/Region Where Respondent Grew Up or Was Born *
*************************************************************************************

***********************
* State R was born in * 
***********************

*Note: The values of M497A are not identical to those of state fips codes. Will merge on state name.
decode M497A, gen(state_name) 
replace state_name="Washington DC" if state_name=="District of Columbia" 

sort state_name
preserve 
	clear 
	use "../Crosswalks/state_name_ab_fips_xwalk.dta" 
	sort state_name 
	tempfile xwalk 
	save `xwalk' 
restore 
merge m:1 state_name using `xwalk' 
drop if _merge==2

drop _merge 

rename state_name place_born
rename state_fips bpl 
label var bpl "R Birthplace, FIPS codes"

***********************
* Region R was born in * 
***********************
gen fips = bpl
gen region4_born=.
	* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
replace region4_born=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
	* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
replace region4_born=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46
	/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
			  Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
replace region4_born=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 | fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48
	* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
replace region4_born=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15
tab region4_born, m
label var region4_born "Region R born"

drop fips

******************************************
* Whether R was born in the south * 
******************************************
gen bornsouth = (region4_born==3) if region4_born~=.
tab bornsouth, m

******************************************
* State and Region R grew up in * 
******************************************

*Note: The values of M499A are not identical to those of state fips codes. Will merge on state name.
decode M499A, gen(state_name) 
replace state_name="Washington DC" if state_name=="District of Columbia" 

sort state_name
preserve 
	clear 
	use "../Crosswalks/state_name_ab_fips_xwalk.dta" 
	sort state_name 
	tempfile xwalk 
	save `xwalk' 
restore 
merge m:1 state_name using `xwalk' 
drop if _merge==2

rename state_name place_grewup
rename state_fips state_childhood
tab state_childhood, m

label var state_childhood "R state grew up in, FIPS codes"

drop _merge 

gen fips = state_childhood
gen region4_childhood=.
	* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
replace region4_childhood=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
	* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
replace region4_childhood=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46
	/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
			  Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
replace region4_childhood=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 | fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48
	* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
replace region4_childhood=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15

label var region4_childhood "Region R grew up"

drop fips

******************************************
* Region R currently resides in * 
******************************************

gen region4=.	
* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
replace region4=1 if REGION==1
* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
replace region4=2 if REGION==2
* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama, Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas
replace region4=3 if REGION==3
* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
replace region4=4 if REGION==4

label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
label values region4 region4_born region4_childhood region_l
tab region4_born, m
tab region4_childhood, m
tab region4, m

************************************************
* Whether R has moved state/region since birth * 
************************************************

gen moved_state = (M497B==2) if foreignborn<1 & M497B~=. 
tab moved_state, m

gen moved_region = (region4~=region4_born | region4~=region4_childhood)

/*Correct moved_region for when region4_born==. OR region4_childhood==.*/
replace moved_region =. if (region4_born==. & region4_childhood==.) 
replace moved_region =0 if (region4==region4_born & region4_childhood==. & region4~=.)
replace moved_region =0 if (region4==region4_childhood & region4_born==. & region4~=.)
tab moved_region, m

*-------------------------------------------------------------------------------------------------*
*-------------------------------------------------------------------------------------------------*

*****************
* Employment *
*****************

gen employed = (CHKPTX==1) if CHKPTX~=.
tab employed, m

/*Note: Majority of respondents have IRSELF=999996. 
        Seems like these individuals are not self-employed 
        but are employed. Will be coded as "0". */
gen selfemployed = (IRSELF<999996) if IRSELF<. 
tab selfemployed, m

*******************************
* Education-Respondent *
*******************************

ren COMPLED higrade 

gen eduR= 0 if higrade==0 
replace eduR=1 if higrade>=1 & higrade<8  /* some grade school */ 
replace eduR=2 if higrade==8  /* completed 8th grade */ 
replace eduR=3 if higrade>8 & higrade<12  /* some HS */ 
replace eduR=4 if higrade==12 /* 4 years of HS */ 
replace eduR=5 if higrade>12 & higrade<16  /* 1-3 years of college */ 
replace eduR=6 if higrade>15 & higrade<.  /* 4 or more years of college (like BA) */ 
tab eduR, m
			
rename higrade yrsschool 
label var yrsschool "Years of school" 	
tab yrsschool, m nol 

gen yrsschool_bin=. 
replace yrsschool_bin = 0 if yrsschool==0 
replace yrsschool_bin = 6 if yrsschool>0 & yrsschool<8  
replace yrsschool_bin = 8 if yrsschool==8 
replace yrsschool_bin = 10 if yrsschool>8 & yrsschool<12 
replace yrsschool_bin = 12 if yrsschool==12 
replace yrsschool_bin = 14 if yrsschool>12 & yrsschool<16 
replace yrsschool_bin = 16 if yrsschool>=16 & yrsschool<=20 
tab yrsschool_bin , m
label var yrsschool_bin "Years of school, binned" 
	
gen hs_ed = eduR>=4 if edu<. 
tab hs_ed, m
label var hs_ed "HS educated" 

gen coll_ed = eduR>=6 if edu<.
tab coll_ed, m 
label var coll_ed "Coll educated" 

*********************
* Education-Parents *
*********************

gen dad_ed_raw= M501 
gen mom_ed_raw=M502  

foreach name in mom dad { 
gen edu_`name'=0 if `name'_ed_raw==0 //none
replace edu_`name'=1 if `name'_ed_raw>0 & `name'_ed_raw<8 //some grade school
replace edu_`name'=2 if `name'_ed_raw==8 //completed 8th grade
replace edu_`name'=3 if `name'_ed_raw>8 & `name'_ed_raw<12 //some HS 
replace edu_`name'=4 if `name'_ed_raw==12 //4 years of HS
replace edu_`name'=5 if `name'_ed_raw>12 & `name'_ed_raw<16 //some college
replace edu_`name'=6 if `name'_ed_raw==16 | `name'_ed_raw==17  //college
***NOte that 25 = GED, so code as HS 
replace edu_`name'=4 if `name'_ed_raw==25 
label var edu_`name' "Educational categories for `name'" 
} 

* binned
foreach name in mom dad { 
gen edu_`name'_bin=0 if edu_`name'==0 
replace edu_`name'_bin=6 if edu_`name'==1 
replace edu_`name'_bin=8 if edu_`name'==2 
replace edu_`name'_bin=10 if edu_`name'==3 
replace edu_`name'_bin=12 if edu_`name'==4 
replace edu_`name'_bin=14 if edu_`name'==5 
replace edu_`name'_bin=16 if edu_`name'==6 
tab edu_`name'_bin, m
label var edu_`name'_bin "`name' Years of school from bins"
	
gen `name'_hs_ed = edu_`name'>=4 if edu_`name'<.
tab `name'_hs_ed, m

gen `name'_coll_ed = edu_`name'>=6 if edu_`name'<.
tab `name'_coll_ed, m 

label var `name'_hs_ed "`name' HS educated" 
label var `name'_coll_ed "`name' College educated"
}

foreach a in dad mom {
	replace `a'_ed_raw =12 if `a'_ed_raw==25
	rename `a'_ed_raw yrsschool_`a'
	tab yrsschool_`a', m
}

************
* Siblings *
************

*# of siblings
	replace M72 =0 if M72==. & M71==2 //replace as 0 to account for skip logic of survey
	replace M73 =0 if M73==. & M71==2 //replace as 0 to account for skip logic of survey
	
	gen R_num_siblings = M65 + M72 + M73 if M65<96 & M72!=. & M73!=.
	replace R_num_siblings = M65 if M65<96 & M72==. & M73==.
	replace R_num_siblings = M65 + M72 if M65<96 & M72!=. & M73==.
	replace R_num_siblings = M65 + M73 if M65<96 & M72==. & M73!=.
	replace R_num_siblings = M72 if M65>=96 & M72!=. & M73==.
	replace R_num_siblings = M72 + M73 if M65>=96 & M72!=. & M73!=.
	replace R_num_siblings = M73 if M65>=96 & M72==. & M73!=.
	tab R_num_siblings, m // # of siblings variable accounts for full brothers/sisters and half/step siblings with whom R grew up.
	label var R_num_siblings "# of R's siblings"
	
*# brothers--not available.
*# sisters--not available.

*****************
* Own fertility *
*****************

/* The following variables are not available due to inconsistencies between several relevant variables.
   R_num_girls_living 
   R_num_boys_living  
   Flag for indeterminate (high) # of boys
   Flag for indeterminate (high) # of girls
*/

* # of living kids: count adopted, biological, and step children

//adopted	
	foreach num of numlist 1/12 {
		local n: display %02.0f `num'
		
		gen R_adoptedchild_`num' = (M203P`n'<95) if M203P`n'<.
		tab M202 if M203P`n'==., m
		replace R_adoptedchild_`num' =0 if R_adoptedchild_`num'==. & M202==0
		tab R_adoptedchild_`num', m
		
		gen R_dead_adoptedchild_`num' = (M203P`n'==95) if R_adoptedchild_`num'<.
		tab R_dead_adoptedchild_`num', m 		
	}
	
	egen R_numadoptedkids_living = rowtotal(R_adoptedchild_*), missing 	
	egen R_numadoptedkids_dead = rowtotal(R_dead_adoptedchild_*), missing
	
	ren M202 R_numadoptedkids_ever

//step
	ren CHKPTT R_numstepkids_ever
	
	gen flag_2plusstepkids = (R_numstepkids_ever==2) if R_numstepkids_ever<.
	tab flag_2plusstepkids,m 
	label var flag_2plusstepkids "Dummy =1 if R has indeterminate (but >0) # of stepchildren"
	
//biological 
	ren M204 R_numbiokids_ever
	ren M211NUM R_numbiokids_dead
	replace R_numbiokids_dead = 0 if R_numbiokids_dead==. & R_numbiokids_ever==0
	label define M211NUM 0 "All living or no bio kids", modify

	gen R_numbiokids_living = R_numbiokids_ever - R_numbiokids_dead if R_numbiokids_dead<. //only R_numbiokids_dead ever has missings
	replace R_numbiokids_living = R_numbiokids_ever if R_numbiokids_dead==.
	tab R_numbiokids_living, m
	
//Add together the 3 types of living children
	gen R_numkids_living =.
	replace R_numkids_living = R_numadoptedkids_living + R_numstepkids_ever + R_numbiokids_living if (R_numadoptedkids_living!=. & R_numstepkids_ever!=. & R_numbiokids_living!=.) //note: only R_numadoptedkids_living and R_numstepkids_ever have missings
	replace R_numkids_living = R_numbiokids_living if (R_numadoptedkids_living==. & R_numstepkids_ever==. & R_numbiokids_living!=.)
	replace R_numkids_living = R_numbiokids_living + R_numstepkids_ever if (R_numadoptedkids_living==. & R_numstepkids_ever!=. & R_numbiokids_living!=.)
	replace R_numkids_living = R_numbiokids_living + R_numadoptedkids_living if (R_numadoptedkids_living!=. & R_numstepkids_ever==. & R_numbiokids_living!=.)
	replace R_numkids_living = R_numadoptedkids_living if (R_numadoptedkids_living!=. & R_numstepkids_ever==. & R_numbiokids_living==.)
	replace R_numkids_living = R_numstepkids_ever if (R_numadoptedkids_living==. & R_numstepkids_ever!=. & R_numbiokids_living==.)
	replace R_numkids_living = R_numstepkids_ever + R_numadoptedkids_living if (R_numadoptedkids_living!=. & R_numstepkids_ever!=. & R_numbiokids_living==.)
	
	tab R_numkids_living, m
	label var R_numkids_living "# of R's kids who are living"
	
* # of deceased kids (i.e., adopted or bio--note: cannot see in the data whether any stepkids died)
	gen R_numkids_dead = R_numadoptedkids_dead + R_numbiokids_dead if (R_numadoptedkids_dead!=. & R_numbiokids_dead!=.)
	replace R_numkids_dead = R_numadoptedkids_dead if (R_numadoptedkids_dead!=. & R_numbiokids_dead==.)
	replace R_numkids_dead = R_numbiokids_dead if (R_numadoptedkids_dead==. & R_numbiokids_dead!=.)
	tab R_numkids_dead, m 
	label var R_numkids_dead "# of R's kids who are deceased"

* # of kids ever (i.e., living + deceased)
	gen R_numkids_ever =. 
	replace R_numkids_ever = R_numkids_living + R_numkids_dead if (R_numkids_living!=. & R_numkids_dead!=.)
	replace R_numkids_ever = R_numkids_living if (R_numkids_living!=. & R_numkids_dead==.)
	replace R_numkids_ever = R_numkids_dead if (R_numkids_living==. & R_numkids_dead!=.)
	tab R_numkids_ever, m
	label var R_numkids_ever "# of kids that R has ever had (living or deceased)"

* Dummy: Has R ever had kids?
	gen R_kids_ever = R_numkids_ever!=0 if R_numkids_ever<.
	tab R_kids_ever,m 
	label var R_kids_ever "Dummy=1 if R has ever had kids"

*Dummy: Does R have kids right now?
	gen R_kids_now = R_numkids_living!=0 
	tab R_kids_now,m 

*********************************
* # of persons in R's household *
*********************************

* # of kids (of any age) living in R's hh
	gen R_totnumkids_livinginhh = LSTA1NUM + LSTA2NUM + LSTDNUM 
	/*
	LSTA1NUM = number of children (bio, step, adopted) on hh roster, age 4 or younger
	LSTA2NUM = number of children (bio, step, adopted) on hh roster, age 5-18
	LSTDNUM  = number of children (bio, step, adopted) on hh roster, 19+
	*/	
	tab R_totnumkids_livinginhh,m 
	label var R_totnumkids_livinginhh "Total # of kids (of any age) living in R's hh (R0050100)"
	
*flag any Rs who have an indeterminate (high) # of kids aged 5-18
	gen flag_9plus_kids5to18 = (LSTA2NUM==9) 
	tab LSTA2NUM,m 
	tab flag_9plus_kids5to18, m
	label var flag_9plus_kids5to18 "Dummy =1 if R has an indeterminate (9+) # of kids aged 5-18"

* # of children 0-17 living in R's hh
	ren RELKID R_totnumkids_0to17_livinginhh
	label var R_totnumkids_0to17_livinginhh "Total # of kids (0-17) living in R's hh"

* total # of people living in R's household
	gen R_hhsize_minusR = R_totnumkids_0to17_livinginhh + RELADUL 
	/*Note: RELADUL = number of related persons age 18 and older in the household. 
	        R does not appear to be counted. */
	tab R_hhsize_minusR,m 

	gen R_hhsize_plusR = R_hhsize_minusR +1 
	tab R_hhsize_plusR,m 
	
	label var R_hhsize_plusR "total # of persons in R's hh (including R)"
	label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"	

******************
* Unions *
******************

*Unavailable

*******************
* Veterans *
*******************

/*Note: Veteran status is measured through 
		(1) R reporting end date of last separation from active duty
		or 
		(2) R reporting participation in a veterans group.

		While imperfect, the resulting share of men aged 30-50 who served 
		makes sense for the 1940/1950 cohort (~32%). */
gen veteran = (inrange(E217C,2,5) | M527M<9995)
replace veteran =. if E217C==. & M527M==.
tab veteran, m

/*------------------------------------------------------------------------------------------

Crosswalking 1987-1988 NSFH occupations to 1950 ANES occupations for fathers/mothers

------------------------------------------------------------------------------------------*/

***********************
*		      *
*    Crosswalking     *
* 		      *
***********************
ren M500A dad_occ
ren M504A mom_occ

foreach name in dad mom {

	gen census1980=`name'_occ 
	label var census1980 "==`name'_occ (renamed to facilitate a merge)" 

	preserve
	use ../Crosswalks/Crosswalk_1980Census_toANES.dta, clear
	if "`name'"=="mom" ren fatheroccej motheroccej

	tempfile crossw
	save `crossw'
	restore
	
	sort census1980
	merge m:1 census1980 using `crossw'
	
	assert (census1980==. | census1980==996) if _merge==1
	drop if _merge==2
	drop _merge
	drop census1980
}

** Dummies for head of household when R was growing up 

/*Following the coding in other surveys: 
--headofhh_father =1 if R lived with both parents when growing up or if R reports having lived with only a father. 
--headofhh_mother =1 if R lived with a mother but not a father. 
--headofhh_othermale =1 if R lived with a male relative and not with R's parents.
--headofhh_otherfemale =1 if R lived with a female relative and not with R's parents or a male relative. 
*/

	gen headofhh_father =.
	replace headofhh_father=1 if M44==1 //M44==1 means that R lived with both bio parents when growing up
	replace headofhh_father =1 if inlist(1,BIOF16,STEPF16,ADOPF16) & headofhh_father==.
	replace headofhh_father = 0 if headofhh_father==.
	tab headofhh_father,m 

	gen headofhh_mother =.
	replace headofhh_mother =0 if M44==1 
	replace headofhh_mother =1 if inlist(1,BIOM16,STEPM16,ADOPM16) & (BIOF16!=1 & STEPF16!=1 & ADOPF16!=1) & headofhh_mother==.
	replace headofhh_mother = 0 if headofhh_mother==. 
	tab headofhh_mother,m 

/* Note: Due to lack of information in the survey, 
         it is not possible to tell when the hh head 
         was someone other than one or both of R's 
         biological parents.*/
	gen headofhh_othermale =.
	replace headofhh_othermale =0 if M44==1 | headofhh_father==1 | headofhh_mother==1 
	tab headofhh_othermale,m 
	
	gen headofhh_otherfemale =.
	replace headofhh_otherfemale =0 if M44==1 | headofhh_father==1 | headofhh_mother==1 
	tab headofhh_otherfemale,m 

	label var headofhh_father "Head of hh when R was growing up was R's father"
	label var headofhh_mother "Head of hh when R was growing up was R's mother"
	label var headofhh_othermale "Head of hh when R was growing up was some other male (not R's father)"
	label var headofhh_otherfemale "Head of hh when R was growing up was some other female (not R's mother)"

	//Alternate dummy for father as head of hh during R's childhood.  
	/*Note: When R reports occupation of a parent but does not report 
			who R lived with when growing up, will assume that R lived 
			with father. This is not an issue for NSFH, so will code
			_imputed = headofhh_father.  */	
	gen headofhh_father_imputed = headofhh_father
	label var headofhh_father_imputed "Impute dad when parent occ != missing & no info about hh head at age 16"
	tab headofhh_father_imputed,m 
	

**************************************************************************
* Fix occupations for self-employed businessmen, managers, or officials * 
**************************************************************************
/*"Type of work organization" (i.e., private sector employer, government, or self-employed) 
not available for fathers.*/

*****************************************************************************************************
* DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) *
*****************************************************************************************************
gen father_notworking =.
replace father_notworking =1 if fatheroccej==. & inlist(dad_occ,992,995)
replace father_notworking =0 if fatheroccej!=.
tab father_notworking,m 

gen mother_notworking =.
replace mother_notworking =1 if motheroccej==. & mom_occ==996
replace mother_notworking =0 if motheroccej!=.
tab mother_notworking, m 

* Variable for father being either farm laborer or operator 
gen fatherfarm=0 
replace fatherfarm=. if fatheroccej==. 
replace fatherfarm=1 if fatheroccej==71 | fatheroccej==81 
tab fatherfarm, m 

//Rename to allow crosswalk to be merged again
rename fatheroccej father_occ1950ej 

ren M540A Rocc
label var Rocc "R occ, NSFH"
gen census1980= Rocc /*now = adult child's occ */
label var census1980 "==Rocc_occ (renamed to facilitate a merge)" 

sort census1980 
tempfile temps 
save `temps', replace 

/*------------------------------------------------------------------------------------------

Crosswalking 1987-1988 NSFH occupations to 1950 ANES occupations for adult children (R)							

------------------------------------------------------------------------------------------*/

***********************
*		      *
*    Crosswalking     *
* 		      *
***********************

merge m:1 census1980 using "../Crosswalks/Crosswalk_1980Census_toANES.dta"
assert census1980==. if _merge==1
drop if _merge==2
drop _merge 

*************************************************************************
* Fix occupations for self-employed businessmen, managers, or officials * 
*************************************************************************
replace fatheroccej=21 if fatheroccej==28 & selfemployed==1

*Rename to match the variable names in other data files 
rename fatheroccej occRej 
rename father_occ1950ej fatheroccej 

/*------------------------------------------------------------------------------------------

				Family Income							

------------------------------------------------------------------------------------------*/

gen lnfaminc_nobin = ln(IFTOT) 

/*
The midpoint of each bin is assigned, with the exception of:
 (1) the last bin, whose bottom value is multiplied by 1.25 (as last bin is always "open-ended"--i.e. "25,000 or more")
 (2) the bottom bin, whose top value is multiplied by 0.75
*/ 
gen fam_inc=.  
replace fam_inc = 0.75*4000 if IFTOT<4000
replace fam_inc = 5000 if IFTOT>=4000 & IFTOT<6000
replace fam_inc = 7000  if IFTOT>=6000 & IFTOT<8000
replace fam_inc = 10250 if IFTOT>=8000 & IFTOT<12500
replace fam_inc = 15000 if IFTOT>=12500 & IFTOT<17500
replace fam_inc = 20000 if IFTOT>=17500 & IFTOT<22500
replace fam_inc = 26250 if IFTOT>=22500 & IFTOT<30000
replace fam_inc = 35000 if IFTOT>=30000 & IFTOT<40000
replace fam_inc = 50000 if IFTOT>=40000 & IFTOT<60000
replace fam_inc = 1.25*60000 if IFTOT>=60000 & IFTOT~=.
tab fam_inc, m 
label var fam_inc "Family income, binned (based on midpoints of each bin)" 

/*Note: The suffix "_son" is used to match the variable names in other datasets. 
        All respondents (i.e., male and female) are given a value for these variables.*/
gen bottomcoded_son = fam_inc==0.75*4000 if fam_inc<. 
tab bottomcoded_son, m 

gen topcoded_son = fam_inc==1.25*60000 if fam_inc<. 
tab topcoded_son, m 

label var bottomcoded_son "Respondent family income, bottom coded" 
label var topcoded_son "Respondent family income, top coded" 

/* Turn fam_inc into 1950 dollars using the CPI: https://data.bls.gov/timeseries/CUUR0000SA0 */ 
gen CPI1950 = 24.1 
gen CPI1986 = 109.6 //All income questions ask about 1986 income, so 1986 CPI will be used to adjust family income.

gen fam_inc_real =. 
replace fam_inc_real = fam_inc * (CPI1950/CPI1986) 
label var fam_inc_real "Binned Family income, in 1950 dollars" 
	
gen lnfaminc=ln(fam_inc_real) 
label var lnfaminc "Logged family income, binned and real" 

/*------------------------------------------------------------------------------------------

									Birth cohorts							

------------------------------------------------------------------------------------------*/

/*Note: It's not sufficient to use year of interview and age of R to calculate dob 
        because the latter will be age of R at the interview date. For instance, if R is
        interviewed in February of 1987 and has a birthday in April, any calculation of 
        dob = interview year-age will miscalculate birth year by 1 year. Will use month of 
        interview to correct this. */

gen year = 1900 + MYEAR //note: myear has values of 87 and 88 
label var year "Survey year" 

gen dob = year - age
tab dob, m

//Obtain birth month from "century months" coding of R birthdate by using mod function. 
gen MBIRTH = mod(M485M,12) if M485M~=0 /*Exclude "born before 1900" responses */
replace MBIRTH =12 if MBIRTH==0 /*As specified, the mod function assigns "0" to all December birthdates.*/
tab MBIRTH, m

//Fix all cases for which interview month is earlier in the year than R's birthday month
replace dob = dob -1 if MMONTH<MBIRTH & MBIRTH~=. 
tab dob, m
label var dob "Year of birth" 

/*Note: No need to fix the cases where interview month>birth month because at the time of interview, 
        R has already reached his/her full age for that year (i.e. his/her age by Dec. 31). */

gen decade=.
replace decade=1930 if inrange(dob,1930,1939)
replace decade=1940 if inrange(dob,1940,1949)
replace decade=1950 if inrange(dob,1950,1959)
tab decade,m
label var decade "Decade of birth"

* Generate dummies for each decade
tab decade, gen(decade_)

/*------------------------------------------------------------------------------------------

									Interactions			

------------------------------------------------------------------------------------------*/ 
	
	global institution_list "veteran black hs_ed coll_ed"

* Demean the variables that we will use
	foreach var in $institution_list {
	sum `var'
	gen `var'_dm = `var'- `r(mean)'
	local temp: var label `var' 
	label var `var'_dm "`temp', demeaned" 
	}

/*------------------------------------------------------------------------------------------

										Save			

------------------------------------------------------------------------------------------*/ 

gen faminc_missing = fam_inc_real==.
label var faminc_missing "Family income missing"

*Unique identifier is MCASEID
ren MCASEID id_nsfh
label var id_nsfh "R ID (unique identifier)"

duplicates report id_nsfh if id_nsfh~=. //no duplicates reported

* Drop unnecessary variables
drop M1* M2* M3* M4* M5* M6* M7* M8* M9*
drop BIO* STEP* ADOP* GRAN* OREL* FOST* INST* OTHR* ONOWN* CHKPT* LST* LISTA* MFOCAL*
drop MOB* E1STAT-E220NUM SECTYPE-S207I4 S209-T101O K1-K14 R0* R10-R52 census1980

* Restrict sample and save
compress
sort id_nsfh
ren AWEIGHT weight_nsfh 
order id_nsfh weight_nsfh
save ./output/fam_hh_cleaned, replace 

